#include <iostream>
#include "tinyxml2.h"
#include <regex>
#include <string>
#include <fstream>
#include <vector>

using namespace tinyxml2;
using std::cout;
using std::endl;
using std::cerr;
using std::regex;
using std::vector;
using std::string;
using std::ofstream;

struct RSSITEAM{
    string _title;
    string _link;
    string _description;
    string _content;
};

class RSS{
public:
    RSS(size_t capa){
        _rss.reserve(capa);
    }

    //读文件
    void read(const string &filename){
        XMLDocument doc;
        doc.LoadFile(filename.c_str());
        if(doc.ErrorID()){
            cerr<<"error"<<endl;
            return;
        }

        XMLElement *pNode=
            doc.FirstChildElement("rss")->
            FirstChildElement("channel")->FirstChildElement("item");
        while(pNode){
            string title = pNode->FirstChildElement("title")->GetText();
            string link = pNode->FirstChildElement("link")->GetText();
            string description = pNode->FirstChildElement("description")->GetText();
            string content = pNode->FirstChildElement("content:encoded")->GetText();

            regex reg("<[^>]+>");
            description = regex_replace(description,reg," ");
            content = regex_replace(content,reg," ");
        
            
            RSSITEAM rssItem;
            rssItem._title=title;
            rssItem._link=link;
            rssItem._description=description;
            rssItem._content=content;

            _rss.push_back(rssItem);

            //处理下一篇
            pNode = pNode->NextSiblingElement("item");
        }
    }

    void store(const string &filename){
        ofstream ofs(filename);
        if(!ofs){
            cerr<<"open"<<filename<<"failed"<<endl;
            return;
        }
        for(size_t idx=0;idx!=_rss.size();++idx){
            ofs<<"<doc>\n"
                <<' '<<"<docid>"<<idx+1<<"</docid>\n"
                <<' '<<"<title>"<<_rss[idx]._title<<"</title>\n"
                <<' '<<"<link>"<<_rss[idx]._link<<"</link>\n"
                <<' '<<"<description"<<_rss[idx]._description<<"</descriptipn>\n"
                <<' '<<"<content>"<<_rss[idx]._content<<"</content>\n"
                "</doc>";
            ofs<<'\n'<<'\n';
        }
        ofs.close();
    }

private:
    vector<RSSITEAM> _rss;
};


int main()
{   
    RSS rss(4000);
    rss.read("coolshell.xml");
    rss.store("pagelib.dat");
    return 0;
}

